from pathlib import Path
from datetime import time
import pandas as pd
import plotly.io as pio
import plotly.offline as pyo
pyo.init_notebook_mode()
# pio.renderers.default = "iframe"
import plotly.express as px
# See https://drive.google.com/drive/folders/1IRt4NXV3SKq-z-7r83w2kcWv4zf0U1Bt?usp=sharing
INTRADAY_PV_LIVE_FILE = Path("PV_Live_intraday_20160901_to_20230531.zip")
RETROSPECTIVE_PV_LIVE_FILE = Path("PV_Live_latest_as_of_20230620_from_20160901_to_20230531.zip")
GSP_ID = 0
pvlive_intraday = pd.read_csv(
INTRADAY_PV_LIVE_FILE,
parse_dates=["updated_gmt", "datetime_GMT"]
)
pvlive_intraday["source"] = "intraday"
pvlive_intraday.head()
| gsp_id | updated_gmt | datetime_GMT | generation_MW | capacity_MWp | installedcapacity_MWp | site_count | source | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2023-06-14 16:53:50 | 2016-09-01 | 0.0 | 11359.093000 | 11548.010000 | NaN | intraday |
| 1 | 1 | 2023-06-15 04:20:31 | 2016-09-01 | 0.0 | 142.923700 | 145.673720 | NaN | intraday |
| 2 | 2 | 2023-06-15 04:20:31 | 2016-09-01 | 0.0 | 18.067002 | 18.206564 | NaN | intraday |
| 3 | 3 | 2023-06-15 04:20:31 | 2016-09-01 | 0.0 | 85.587469 | 86.701236 | NaN | intraday |
| 4 | 4 | 2023-06-15 04:20:31 | 2016-09-01 | 0.0 | 2.331996 | 2.376530 | NaN | intraday |
pvlive_retro = pd.read_csv(
RETROSPECTIVE_PV_LIVE_FILE,
parse_dates=["updated_gmt", "datetime_GMT"]
)
pvlive_retro["source"] = "retrospective"
pvlive_retro.head()
| gsp_id | updated_gmt | datetime_GMT | generation_MW | capacity_MWp | installedcapacity_MWp | site_count | calculation_time_s | source | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2022-07-20 22:49:45 | 2016-09-01 | 0.0 | 11352.312000 | 11540.967000 | 7031.0 | 0 | retrospective |
| 1 | 1 | 2022-08-05 11:40:47 | 2016-09-01 | 0.0 | 142.918330 | 145.665060 | 16695.0 | 0 | retrospective |
| 2 | 2 | 2022-08-05 11:40:47 | 2016-09-01 | 0.0 | 18.063546 | 18.202709 | 16695.0 | 0 | retrospective |
| 3 | 3 | 2022-08-05 11:40:47 | 2016-09-01 | 0.0 | 85.587641 | 86.699495 | 16695.0 | 0 | retrospective |
| 4 | 4 | 2022-08-05 11:40:47 | 2016-09-01 | 0.0 | 2.331981 | 2.376462 | 16695.0 | 0 | retrospective |
pvlive = pd.concat(
[pvlive_intraday, pvlive_retro],
ignore_index=True,
)
pvlive["yield_MW_per_MWp"] = pvlive.generation_MW / pvlive.installedcapacity_MWp
fig = px.line(
pvlive[(pvlive.gsp_id == GSP_ID) & pvlive.site_count.notnull()],
x="datetime_GMT",
y="site_count",
color="source",
height=500
)
fig.show()
Note that there are several short-lived outages in one or both of the PV generation data sources which mean that sometimes the retrospective revision does not use more systems than the intraday calculation.
Note also that the number reported in the site_count field is the number of systems used by the PV_Live model after removing anomalous readings and, in the case of the national model, selecting an optimal subsample for geographical distribution.
Note also that the PV_Live model automatically computes the overnight generation as 0, in which case the site_count field will be NULL/NA, since the sample data was not used.
N.B. The total available sample is typically:
pvlive_split = pvlive[(pvlive.gsp_id == GSP_ID)].pivot(
index="datetime_GMT",
columns="source",
values=["generation_MW", "installedcapacity_MWp", "yield_MW_per_MWp", "site_count"]
).reset_index()
pvlive_split.columns = ["_".join(col[::-1]) if col[1] else col[0] for col in pvlive_split.columns.values]
pvlive_split.head()
| datetime_GMT | intraday_generation_MW | retrospective_generation_MW | intraday_installedcapacity_MWp | retrospective_installedcapacity_MWp | intraday_yield_MW_per_MWp | retrospective_yield_MW_per_MWp | intraday_site_count | retrospective_site_count | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-09-01 00:00:00 | 0.0 | 0.0 | 11548.01 | 11540.967 | 0.0 | 0.0 | NaN | 7031.0 |
| 1 | 2016-09-01 00:30:00 | 0.0 | 0.0 | 11548.01 | 11546.736 | 0.0 | 0.0 | NaN | 7031.0 |
| 2 | 2016-09-01 01:00:00 | 0.0 | 0.0 | 11548.01 | 11546.736 | 0.0 | 0.0 | NaN | 7031.0 |
| 3 | 2016-09-01 01:30:00 | 0.0 | 0.0 | 11548.01 | 11546.736 | 0.0 | 0.0 | NaN | 7031.0 |
| 4 | 2016-09-01 02:00:00 | 0.0 | 0.0 | 11548.01 | 11546.736 | 0.0 | 0.0 | NaN | 7031.0 |
fig = px.scatter(
pvlive_split,
x="retrospective_yield_MW_per_MWp",
y="intraday_yield_MW_per_MWp",
trendline="ols",
width=600,
height=600
)
trendline = px.get_trendline_results(fig)
print(trendline.px_fit_results[0].summary())
fig.show()
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.994
Model: OLS Adj. R-squared: 0.994
Method: Least Squares F-statistic: 2.131e+07
Date: Wed, 21 Jun 2023 Prob (F-statistic): 0.00
Time: 14:37:39 Log-Likelihood: 3.6353e+05
No. Observations: 118163 AIC: -7.271e+05
Df Residuals: 118161 BIC: -7.270e+05
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0010 3.86e-05 26.522 0.000 0.001 0.001
x1 0.9666 0.000 4616.466 0.000 0.966 0.967
==============================================================================
Omnibus: 19912.401 Durbin-Watson: 1.965
Prob(Omnibus): 0.000 Jarque-Bera (JB): 239662.723
Skew: 0.445 Prob(JB): 0.00
Kurtosis: 9.920 Cond. No. 6.51
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
pvlive_split["revision_MW"] = pvlive_split.retrospective_generation_MW - pvlive_split.intraday_generation_MW
pvlive_split["abs_revision_MW"] = pvlive_split.revision_MW.abs()
pvlive_split["year_month"] = pvlive_split.datetime_GMT.to_numpy().astype("datetime64[M]")
pvlive_split["time_of_day"] = pvlive_split.datetime_GMT.dt.time
pvlive_split["month"] = pvlive_split.datetime_GMT.dt.month
revision_by_month = pvlive_split.groupby("year_month", as_index=False).agg(
max_abs_revision_MW=("abs_revision_MW", "max"),
mean_abs_revision_MW=("abs_revision_MW", "mean"),
mean_revision_MW=("revision_MW", "mean"),
)
revision_by_month.head()
| year_month | max_abs_revision_MW | mean_abs_revision_MW | mean_revision_MW | |
|---|---|---|---|---|
| 0 | 2016-09-01 | 877.19 | 113.631427 | -8.337152 |
| 1 | 2016-10-01 | 841.70 | 78.350946 | 38.352156 |
| 2 | 2016-11-01 | 883.73 | 65.528974 | 43.490655 |
| 3 | 2016-12-01 | 689.59 | 41.974354 | 18.979062 |
| 4 | 2017-01-01 | 845.56 | 46.823630 | 15.545162 |
fig = px.line(
revision_by_month,
x="year_month",
y=["max_abs_revision_MW", "mean_abs_revision_MW", "mean_revision_MW"],
height=500
)
fig.show()
midday_revision_by_month = pvlive_split[pvlive_split.time_of_day == time(12,30)].groupby("year_month", as_index=False).agg(
max_abs_revision_MW=("abs_revision_MW", "max"),
mean_abs_revision_MW=("abs_revision_MW", "mean"),
mean_revision_MW=("revision_MW", "mean"),
)
midday_revision_by_month.head()
| year_month | max_abs_revision_MW | mean_abs_revision_MW | mean_revision_MW | |
|---|---|---|---|---|
| 0 | 2016-09-01 | 877.19 | 260.914000 | -63.262000 |
| 1 | 2016-10-01 | 841.70 | 258.484516 | 199.638710 |
| 2 | 2016-11-01 | 883.73 | 288.006167 | 237.855833 |
| 3 | 2016-12-01 | 689.59 | 182.517226 | 120.959871 |
| 4 | 2017-01-01 | 653.97 | 177.477097 | 81.133290 |
fig = px.line(
midday_revision_by_month,
x="year_month",
y=["max_abs_revision_MW", "mean_abs_revision_MW", "mean_revision_MW"],
height=500
)
fig.show()
revision_by_month_and_sp = pvlive_split.groupby(["month", "time_of_day"], as_index=False).agg(
max_abs_revision_MW=("abs_revision_MW", "max"),
mean_abs_revision_MW=("abs_revision_MW", "mean"),
mean_revision_MW=("revision_MW", "mean"),
)
revision_by_month_and_sp.head()
| month | time_of_day | max_abs_revision_MW | mean_abs_revision_MW | mean_revision_MW | |
|---|---|---|---|---|---|
| 0 | 1 | 00:00:00 | 0.0 | 0.0 | 0.0 |
| 1 | 1 | 00:30:00 | 0.0 | 0.0 | 0.0 |
| 2 | 1 | 01:00:00 | 0.0 | 0.0 | 0.0 |
| 3 | 1 | 01:30:00 | 0.0 | 0.0 | 0.0 |
| 4 | 1 | 02:00:00 | 0.0 | 0.0 | 0.0 |
revision_by_month_and_sp_split = revision_by_month_and_sp.pivot(
index="month",
columns="time_of_day",
values=["mean_revision_MW"]
)
revision_by_month_and_sp_split
| mean_revision_MW | |||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| time_of_day | 00:00:00 | 00:30:00 | 01:00:00 | 01:30:00 | 02:00:00 | 02:30:00 | 03:00:00 | 03:30:00 | 04:00:00 | 04:30:00 | ... | 19:00:00 | 19:30:00 | 20:00:00 | 20:30:00 | 21:00:00 | 21:30:00 | 22:00:00 | 22:30:00 | 23:00:00 | 23:30:00 |
| month | |||||||||||||||||||||
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | -0.026249 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | -10.849976 | -1.432256 | -0.000372 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.002343 | -0.661044 | ... | -28.825703 | -23.410022 | -6.924445 | -0.369512 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.111293 | -14.321015 | ... | -7.385139 | -14.526541 | -12.547694 | -4.501237 | -0.005383 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 7 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.003098 | -0.660383 | ... | -13.083011 | -19.940567 | -8.975662 | -2.327027 | -0.000283 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 8 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | -0.000004 | ... | -17.165685 | -5.629013 | -0.986651 | -0.000002 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 9 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | -0.488130 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 11 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 12 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
12 rows × 48 columns
fig = px.imshow(
revision_by_month_and_sp_split,
labels=dict(
x="Time of day",
y="Month",
color="Mean revision (MW)"
),
x=revision_by_month_and_sp_split.columns.get_level_values(1),
y=revision_by_month_and_sp_split.index.get_level_values(0),
aspect="auto",
color_continuous_scale="jet",
color_continuous_midpoint=0.,
height=700
)
fig.show()
print(f"Mean revision across all HH is: {pvlive_split.revision_MW.mean():.1f} MW")
Mean revision across all HH is: 30.2 MW
print(f"Mean revision at midday is: {pvlive_split[pvlive_split.time_of_day == time(12,30)].revision_MW.mean():.1f} MW")
Mean revision at midday is: 125.0 MW
print(f"Mean absolute revision at midday is: {midday_revision_by_month.mean_abs_revision_MW.mean():.1f} MW")
Mean absolute revision at midday is: 247.6 MW
print(f"Biggest upwards revision is:")
pvlive_split.sort_values('revision_MW', ascending=False).head(1)
Biggest upwards revision is:
| datetime_GMT | intraday_generation_MW | retrospective_generation_MW | intraday_installedcapacity_MWp | retrospective_installedcapacity_MWp | intraday_yield_MW_per_MWp | retrospective_yield_MW_per_MWp | intraday_site_count | retrospective_site_count | revision_MW | abs_revision_MW | year_month | time_of_day | month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 83399 | 2021-06-04 11:30:00 | 5679.93 | 7169.81 | 13510.51 | 13510.51 | 0.420408 | 0.530684 | 1067.0 | 3796.0 | 1489.88 | 1489.88 | 2021-06-01 | 11:30:00 | 6 |
print(f"Biggest downwards revision is:")
pvlive_split.sort_values('revision_MW', ascending=True).head(1)
Biggest downwards revision is:
| datetime_GMT | intraday_generation_MW | retrospective_generation_MW | intraday_installedcapacity_MWp | retrospective_installedcapacity_MWp | intraday_yield_MW_per_MWp | retrospective_yield_MW_per_MWp | intraday_site_count | retrospective_site_count | revision_MW | abs_revision_MW | year_month | time_of_day | month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 104860 | 2022-08-25 14:00:00 | 4629.83 | 3215.75 | 13952.85 | 13952.85 | 0.33182 | 0.230473 | 1458.0 | 4340.0 | -1414.08 | 1414.08 | 2022-08-01 | 14:00:00 | 8 |